4  Tumor: Post-clustering QC and characterization

4.1 Set up Seurat workspace

# Load libraries
library(data.table)
library(devtools)
library(presto)
library(glmGamPoi)
library(sctransform)
library(Seurat)
library(tidyverse)
library(miQC)   
library(SeuratWrappers)
library(flexmix)
library(SingleCellExperiment)
library(SummarizedExperiment)
library(readxl)
library(fishpond)
library(Matrix)
library(speckle)
library(scater)
library(patchwork)
library(vctrs)
library(alevinQC)
library(harmony)
library(scDblFinder)
library(cellXY)

# Set global options for Seurat v5 objects
options(Seurat.object.assay.version = 'v5')

4.2 Load previously saved clustered object

merged.18279.tumor.singlets <- readRDS("Tumor_scRNA_Part3.rds")

4.3 Set idents to preferred initial clustering resolution

Idents(merged.18279.tumor.singlets) <- merged.18279.tumor.singlets$RNA_snn_res.1
merged.18279.tumor.singlets$seurat_clusters <- merged.18279.tumor.singlets$RNA_snn_res.1
DimPlot(merged.18279.tumor.singlets, reduction="umap.harmony", label=T)

4.4 Plot QC

VlnPlot(merged.18279.tumor.singlets, features = "nCount_RNA", group.by="seurat_clusters",pt.size=0) + NoLegend()

VlnPlot(merged.18279.tumor.singlets, features = "nFeature_RNA", group.by="seurat_clusters",pt.size=0) + NoLegend()

VlnPlot(merged.18279.tumor.singlets, features = "percent.mt", group.by="seurat_clusters",pt.size=0) + NoLegend()

4.5 Identify cursory marker genes of each cluster

Note layers were already joined in previous session

DefaultAssay(merged.18279.tumor.singlets) <- "RNA"

vargenes <- presto::wilcoxauc(merged.18279.tumor.singlets, 'seurat_clusters', seurat_assay = 'RNA')
top_vargenes <- top_markers(vargenes, n = 100, auc_min = 0.5, pct_in_min = 50, pct_out_max = 50)
top_vargenes
# A tibble: 100 × 31
    rank `0`   `1`   `10`  `11`  `12`  `13`  `14`  `15`  `16`  `17`  `18`  `19` 
   <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
 1     1 MLANA CCL5  COL1… CRYAB GZMK  MZB1  PTTG1 IFI30 IL32  MS4A1 NKG7  CALD1
 2     2 EDNRB NKG7  COL3… SERP… CD3E  DERL3 TYMS  TYRO… LTB   BANK1 CTSW  IGFB…
 3     3 TYR   CST7  COL1… BANCR CD3D  JCHA… NUCK… LYZ   FOXP3 CD79A TYRO… COL1…
 4     4 CRTA… CD8A  COL6… IFI27 CST7  XBP1  CENPF ENSG… SPOC… CD37  CD247 MYL9 
 5     5 MITF  DUSP2 COL6… ANGP… IL32  IGHG1 UBE2C CST3  CD3E  HLA-… GNLY  TAGLN
 6     6 TYRP1 IL32  DCN   F2R   DUSP2 ENSG… CKS1B PLAUR CTLA4 CD83  CD7   NOTC…
 7     7 GPNMB CD3E  COL6… WARS1 CD2   CD79A BIRC5 FCER… TIGIT HLA-… PRF1  LHFP…
 8     8 APOE  LAG3  C1S   ST3G… RGS1  IGHGP HMGN2 SPI1  CORO… HLA-… GZMA  COL4…
 9     9 GMPR  CD8B  C1R   HMCN1 LBH   IGHG2 H2AZ2 AIF1  PTPRC HLA-… FCER… COL4…
10    10 GPM6B CD3D  VCAN  RHOB  RAC2  FKBP… MIA   TYMP  CD3D  LTB   KLRB1 NR2F2
# ℹ 90 more rows
# ℹ 18 more variables: `2` <chr>, `20` <chr>, `21` <chr>, `22` <chr>,
#   `23` <chr>, `24` <chr>, `25` <chr>, `26` <chr>, `27` <chr>, `28` <chr>,
#   `29` <chr>, `3` <chr>, `4` <chr>, `5` <chr>, `6` <chr>, `7` <chr>,
#   `8` <chr>, `9` <chr>

4.6 Plot genes from slide-tags preprint

Sourced from this preprint

goi <- c("PMEL","MLANA","CCL5","LEF1","FOXP3","PAX5","IGHG3","KCNMA1","ZNF366","CUX2","COL1A1","PLVAP")
VlnPlot(merged.18279.tumor.singlets,features=goi,assay="RNA",layer="data",flip=T,sort=T,stack=T)

4.7 Plot other known markers

Sourced from this paper

tumor <- c("DCT","MLANA","MITF","PMEL","S100A1","TYR","APOC1")
endothelial <- c("PECAM1","VWF")
fibroblast <- c("COL3A1","COL1A1","COL1A2","LUM")
tcell <- c("FGFBP2","KLRD1","CD3E","CD3D","GZMB","XCL2","GZMH","CST7","GZMK","GZMA","IFNG","GNLY","CCL4","NKG7","CCL5","CD8A","CD8B","CTLA4","TNFRSF4","BATF","ITM2A")
mono <- c("LYZ","CD74","CD68")
bcell <- c("MS4A1","CD79A")

VlnPlot(merged.18279.tumor.singlets,features=tumor,assay="RNA",layer="data",flip=T,sort=T,stack=T) +
  VlnPlot(merged.18279.tumor.singlets,features=endothelial,assay="RNA",layer="data",flip=T,sort=T,stack=T) +
  VlnPlot(merged.18279.tumor.singlets,features=fibroblast,assay="RNA",layer="data",flip=T,sort=T,stack=T) +
  VlnPlot(merged.18279.tumor.singlets,features=tcell,assay="RNA",layer="data",flip=T,sort=T,stack=T) +
  VlnPlot(merged.18279.tumor.singlets,features=mono,assay="RNA",layer="data",flip=T,sort=T,stack=T) +
  VlnPlot(merged.18279.tumor.singlets,features=bcell,assay="RNA",layer="data",flip=T,sort=T,stack=T) +
  NoLegend()

4.8 Plot top markers identified and canonical genes as a dotplot

top_vargenes <- top_markers(vargenes, n = 5, auc_min = 0.5, pct_in_min = 50, pct_out_max = 50)
top_markers <- top_vargenes %>%
    select(-rank) %>% 
    unclass() %>% 
    stack() %>%
    pull(values) %>%
    unique() %>%
    .[!is.na(.)]

dotplotmarkers <- unique(c(top_markers,tumor,endothelial,fibroblast,tcell,mono,bcell))

# Compute aggregated expression values of these genes and cluster them to order the figure
rna <- AverageExpression(merged.18279.tumor.singlets,assay="RNA",slot="data")
As of Seurat v5, we recommend using AggregateExpression to perform pseudo-bulk analysis.
First group.by variable `ident` starts with a number, appending `g` to ensure valid variable names
This message is displayed once per session.
rna.sub <- rna$RNA[dotplotmarkers,]
cors.genes <- as.dist(1-cor(as.matrix(t(rna.sub)),method="pearson"))
hc.genes <- hclust(cors.genes)
dotplotmarkers.sorted <- rownames(rna.sub)[hc.genes$order]

# Plot
DotPlot(merged.18279.tumor.singlets,features=dotplotmarkers.sorted,assay="RNA",cols=c("blue","red"),cluster.idents=T) + RotatedAxis()

4.9 Plot expression of key genes in UMAP space

FeaturePlot(merged.18279.tumor.singlets, 
    reduction="umap.harmony", 
    features=c("CD8A","CD4","CTLA4","KLRC1","CD79A","PMEL","MLANA","LYZ","PECAM1","COL3A1","SFN","KRT19"),
    order = T,
    ncol = 2)

4.10 Get session info

sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Rocky Linux 8.10 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so;  LAPACK version 3.9.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: America/New_York
tzcode source: system (glibc)

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] cellXY_0.99.0               scDblFinder_1.14.0         
 [3] harmony_1.2.0               alevinQC_1.16.1            
 [5] vctrs_0.6.5                 patchwork_1.3.0            
 [7] scater_1.30.1               scuttle_1.12.0             
 [9] speckle_1.0.0               Matrix_1.6-4               
[11] fishpond_2.6.2              readxl_1.4.3               
[13] SingleCellExperiment_1.24.0 SummarizedExperiment_1.32.0
[15] Biobase_2.62.0              GenomicRanges_1.54.1       
[17] GenomeInfoDb_1.38.8         IRanges_2.36.0             
[19] S4Vectors_0.40.2            BiocGenerics_0.48.1        
[21] MatrixGenerics_1.14.0       matrixStats_1.4.1          
[23] flexmix_2.3-19              lattice_0.22-6             
[25] SeuratWrappers_0.3.19       miQC_1.8.0                 
[27] lubridate_1.9.3             forcats_1.0.0              
[29] stringr_1.5.1               dplyr_1.1.4                
[31] purrr_1.0.2                 readr_2.1.5                
[33] tidyr_1.3.1                 tibble_3.2.1               
[35] ggplot2_3.5.1               tidyverse_2.0.0            
[37] Seurat_5.1.0                SeuratObject_5.0.2         
[39] sp_2.1-4                    sctransform_0.4.1          
[41] glmGamPoi_1.12.2            presto_1.0.0               
[43] Rcpp_1.0.13-1               devtools_2.4.5             
[45] usethis_3.0.0               data.table_1.16.2          

loaded via a namespace (and not attached):
  [1] fs_1.6.5                  spatstat.sparse_3.1-0    
  [3] bitops_1.0-9              httr_1.4.7               
  [5] RColorBrewer_1.1-3        profvis_0.4.0            
  [7] tools_4.3.2               utf8_1.2.4               
  [9] R6_2.5.1                  DT_0.33                  
 [11] lazyeval_0.2.2            uwot_0.2.2               
 [13] urlchecker_1.0.1          withr_3.0.2              
 [15] GGally_2.2.1              gridExtra_2.3            
 [17] progressr_0.15.1          cli_3.6.3                
 [19] spatstat.explore_3.2-6    fastDummies_1.7.3        
 [21] labeling_0.4.3            spatstat.data_3.1-4      
 [23] ggridges_0.5.6            pbapply_1.7-2            
 [25] Rsamtools_2.18.0          R.utils_2.12.3           
 [27] parallelly_1.39.0         sessioninfo_1.2.2        
 [29] limma_3.58.1              RSQLite_2.3.8            
 [31] BiocIO_1.12.0             generics_0.1.3           
 [33] gtools_3.9.5              ica_1.0-3                
 [35] spatstat.random_3.2-2     ggbeeswarm_0.7.2         
 [37] fansi_1.0.6               abind_1.4-8              
 [39] R.methodsS3_1.8.2         lifecycle_1.0.4          
 [41] yaml_2.3.10               edgeR_4.0.16             
 [43] SparseArray_1.2.2         Rtsne_0.17               
 [45] blob_1.2.4                grid_4.3.2               
 [47] dqrng_0.4.1               promises_1.3.0           
 [49] crayon_1.5.3              shinydashboard_0.7.2     
 [51] miniUI_0.1.1.1            beachmat_2.18.1          
 [53] cowplot_1.1.3             KEGGREST_1.42.0          
 [55] metapod_1.10.1            pillar_1.9.0             
 [57] knitr_1.45                rjson_0.2.23             
 [59] xgboost_1.7.8.1           future.apply_1.11.3      
 [61] codetools_0.2-20          leiden_0.4.3.1           
 [63] glue_1.8.0                remotes_2.5.0            
 [65] png_0.1-8                 spam_2.11-0              
 [67] org.Mm.eg.db_3.18.0       cellranger_1.1.0         
 [69] gtable_0.3.6              cachem_1.1.0             
 [71] xfun_0.49                 S4Arrays_1.2.0           
 [73] mime_0.12                 survival_3.7-0           
 [75] bluster_1.12.0            statmod_1.5.0            
 [77] ellipsis_0.3.2            fitdistrplus_1.2-1       
 [79] ROCR_1.0-11               nlme_3.1-166             
 [81] bit64_4.5.2               RcppAnnoy_0.0.22         
 [83] irlba_2.3.5.1             vipor_0.4.7              
 [85] KernSmooth_2.23-24        DBI_1.2.3                
 [87] colorspace_2.1-1          nnet_7.3-19              
 [89] ggrastr_1.0.2             tidyselect_1.2.1         
 [91] bit_4.5.0                 compiler_4.3.2           
 [93] BiocNeighbors_1.20.2      DelayedArray_0.28.0      
 [95] plotly_4.10.4             rtracklayer_1.62.0       
 [97] scales_1.3.0              lmtest_0.9-40            
 [99] digest_0.6.37             goftest_1.2-3            
[101] spatstat.utils_3.1-1      rmarkdown_2.29           
[103] XVector_0.42.0            htmltools_0.5.8.1        
[105] pkgconfig_2.0.3           sparseMatrixStats_1.14.0 
[107] fastmap_1.2.0             rlang_1.1.4              
[109] htmlwidgets_1.6.4         shiny_1.9.1              
[111] DelayedMatrixStats_1.24.0 farver_2.1.2             
[113] zoo_1.8-12                jsonlite_1.8.9           
[115] BiocParallel_1.36.0       R.oo_1.27.0              
[117] BiocSingular_1.18.0       RCurl_1.98-1.16          
[119] magrittr_2.0.3            modeltools_0.2-23        
[121] GenomeInfoDbData_1.2.11   dotCall64_1.2            
[123] munsell_0.5.1             viridis_0.6.5            
[125] reticulate_1.35.0         stringi_1.8.4            
[127] zlibbioc_1.48.2           MASS_7.3-60.0.1          
[129] org.Hs.eg.db_3.18.0       plyr_1.8.9               
[131] pkgbuild_1.4.5            ggstats_0.7.0            
[133] parallel_4.3.2            listenv_0.9.1            
[135] ggrepel_0.9.6             deldir_2.0-4             
[137] Biostrings_2.70.3         splines_4.3.2            
[139] tensor_1.5                hms_1.1.3                
[141] locfit_1.5-9.10           igraph_2.1.1             
[143] spatstat.geom_3.2-8       RcppHNSW_0.6.0           
[145] reshape2_1.4.4            ScaledMatrix_1.10.0      
[147] pkgload_1.4.0             XML_3.99-0.17            
[149] evaluate_1.0.1            scran_1.30.2             
[151] BiocManager_1.30.25       tzdb_0.4.0               
[153] httpuv_1.6.15             RANN_2.6.2               
[155] polyclip_1.10-7           future_1.34.0            
[157] scattermore_1.2           rsvd_1.0.5               
[159] xtable_1.8-4              restfulr_0.0.15          
[161] svMisc_1.2.3              RSpectra_0.16-2          
[163] later_1.3.2               viridisLite_0.4.2        
[165] AnnotationDbi_1.64.1      GenomicAlignments_1.38.2 
[167] memoise_2.0.1             beeswarm_0.4.0           
[169] tximport_1.28.0           cluster_2.1.6            
[171] timechange_0.3.0          globals_0.16.3